3 Sequencing data

3.1 Sequencing depth

tibble(metric=c("Total GB", "Total reads", "Average GB", "Average reads"),
       value=unlist(c(round(all_data %>% summarise(sum(bases_pre_fastp)) / 1000000000,2),
               round(all_data %>% summarise(sum(bases_pre_fastp)) / 300,2),
               paste0(round(all_data %>% summarise(mean(bases_pre_fastp)) / 1000000000,2),"±",round(all_data %>% summarise(sd(bases_pre_fastp)) / 1000000000,2)),
               paste0(round(all_data %>% summarise(mean(bases_pre_fastp)) / 300,0),"±",round(all_data %>% summarise(sd(bases_pre_fastp)) / 300,0))))
       ) %>%
  tt()
tinytable_22phca5lqtoxf6vp2bs4
metric value
Total GB 937.37
Total reads 3124578099
Average GB 5.18±2.46
Average reads 17262862±8195554
all_data %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(bases_post_fastp / 1000000000), sd(bases_post_fastp / 1000000000))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of sequencing depth (GB)")
tinytable_u5z5i4ahkbwpo2xh3ryy
Mean and standard deviation of sequencing depth (GB)
Taxon ZYMO DREX EHEX
Amphibian 4.0±1.6 3.2±2.1 4.7±0.3
Reptile 6.1±2.2 5.7±1.3 5.0±1.8
Mammal 5.4±3.2 4.6±2.0 3.8±2.2
Bird 3.9±1.8 4.2±2.4 3.1±1.9
Control 0.0±0.0 0.5±0.6 2.1±2.7
all_data %>%
    select(Library,Species,Extraction,bases_pre_fastp,Taxon) %>%
    mutate(bases_pre_fastp=bases_pre_fastp/1000000000) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=bases_pre_fastp, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Sequencing depth (Gb)",x="Extraction method")

all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_ruzsab9tfj2udsw4p1ql
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 4808156823 382290022 12.577249 1.711860e+01 4.477658e-10
fixed NA ExtractionDREX -485437083 283030980 -1.715138 4.392525e+21 8.632000e-02
fixed NA ExtractionEHEX -474572486 284400639 -1.668676 1.443430e+09 9.518163e-02
ran_pars Sample sd__(Intercept) 1407267695 NA NA NA NA
ran_pars Species sd__(Intercept) 508228832 NA NA NA NA
ran_pars Residual sd__Observation 1524168473 NA NA NA NA

3.2 Quality-filtering

all_data %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp*100) %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(qf_bases), sd(qf_bases))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of quality-filtered proportion of reads")
tinytable_io3vh2vpezfhx7lx22uw
Mean and standard deviation of quality-filtered proportion of reads
Taxon ZYMO DREX EHEX
Amphibian 84.7±1.2 91.7±3.7 87.6±2.9
Reptile 89.9±6.3 90.5±7.1 88.3±7.2
Mammal 91.9±2.3 89.5±4.7 91.2±1.9
Bird 70.9±16.1 70.3±23.0 70.2±14.7
Control 3.3±2.3 9.8±11.5 27.5±3.4
all_data %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp*100) %>%
    select(Library,Species,Extraction,qf_bases,Taxon) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=qf_bases, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="High-quality data (%)",x="Extraction method")

all_data  %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp*100) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(qf_bases ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_5m1xqtmdipuzprxp5niz
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 83.056667 3.229677 25.7167105 12.92142 1.758335e-12
fixed NA ExtractionDREX 1.811218 1.262245 1.4349184 145.69071 1.534536e-01
fixed NA ExtractionEHEX 0.572778 1.268390 0.4515789 145.71392 6.522438e-01
ran_pars Sample sd__(Intercept) 7.418800 NA NA NA NA
ran_pars Species sd__(Intercept) 9.356829 NA NA NA NA
ran_pars Residual sd__Observation 6.797395 NA NA NA NA